import librosa
import matplotlib.pyplot as plt
import sys
from CQCC.cqcc import cqcc
import scipy.io.wavfile as wav
import soundfile as sf
import os
import numpy as np
import pickle
import argparse
import pandas as pd
import re
import matplotlib.pyplot as plt
import seaborn as sns
import random
filename = "./D18_1000001.wav"
in some python file they use librosa to read sound files
In my feature extraction code, they used soundfile to read, so i used that here for fair comparison, but both give same result
x,fs = sf.read(filename)
x = x.reshape(x.shape[0], 1) # for one-channel signal
Same parameters are used in matlab code
B = 96
fmax = fs/2
fmin = fmax/2**9
d = 16
cf = 19
ZsdD = 'ZsdD'
The cqcc function return 7 results, I used 'CQcc' for Model building, so did the Baseline paper, and the python converted code in model building paper.
CQcc, LogP_absCQT, TimeVec, FreqVec, Ures_LogP_absCQT, Ures_FreqVec, absCQT = cqcc(x, fs, B, fmax, fmin, d, cf, ZsdD)
print("cqcc_feat:", CQcc.shape) # number of frames * number of cep
print("cqcc_lpms:", LogP_absCQT.shape)
cqcc_feat: (470, 60) cqcc_lpms: (470, 863)
feats_py = [CQcc, LogP_absCQT, TimeVec, FreqVec, Ures_LogP_absCQT, absCQT]
I ran the cqcc code in matlab and saved each result from cqcc function in a text file.
'Ures_FreqVec_test.txt' -> I removed this result, because python version returns nothing in this variable.
matlab_output_folder = '/Users/asimadnan/Desktop/Mres/ASVspoof-main/CQCC/D18_1000001/'
features = ['CQcc_test.txt',
'LogP_absCQT_test.txt',
'TimeVec_test.txt',
'FreqVec_test.txt',
'Ures_LogP_absCQT_test.txt',
'absCQT_test.txt']
RE = '[^0-9\.\-e]'
extract_from_matlab: This function reads txt files generated by matlab and saves them in ndarrays. plt_heatmap: to plot heatmap of a single feature
def extract_from_matlab(file_path):
data = []
with open(file_path) as input_file:
for row in input_file:
data.append( [float(re.sub(RE, '', i)) for i in row.split(',')] )
data=np.array([np.array(xi) for xi in data])
return data
# function to plot single heatmap of a feature.
def plt_heatmap(data,name):
ax = sns.heatmap(data)
plt.title('HeatMap of ' + name)
plt.show()
transposing them so the shape matches the features from python
feats_mat = []
for feature in features:
if(feature == 'LogP_absCQT_test.txt' or feature == 'CQcc_test.txt'):
feats_mat.append( extract_from_matlab( matlab_output_folder + feature).T)
else:
feats_mat.append( extract_from_matlab( matlab_output_folder + feature))
print('MATLAB PYTHON Match')
for idx,feature in enumerate(feats_mat):
#print(feature.shape,feats_py[idx].shape)
match = (feature.shape == feats_py[idx].shape)
print('{mat:>12} {pyt:>12} {match}'.format(mat=str(feature.shape), pyt=str(feats_py[idx].shape), match=match))
MATLAB PYTHON Match
(470, 60) (470, 60) True
(470, 863) (470, 863) True
(1, 470) (1, 470) True
(1, 863) (1, 863) True
(8059, 470) (8059, 470) True
(863, 470) (863, 470) True
# plotting all features side by side for comparison
for idx,feature in enumerate(feats_mat):
plt.figure(figsize=(20, 20))
plt.subplot(6, 2, idx + (idx +1))
ax = sns.heatmap(feature)
plt.title('MATLAB - HeatMap of ' + features[idx])
#plt.figure(figsize=(12, 5))
plt.subplot(6, 2, idx + (idx +2))
ax = sns.heatmap(feats_py[idx])
plt.title('PYTHON - HeatMap of ' + features[idx])
def compare_features(filename,audio_file_location):
#Python feature extraction
#audio_file_location = './'
x,fs = librosa.load(audio_file_location + filename, sr = 16000);
x = x.reshape(x.shape[0], 1)
B = 96
fmax = fs/2
fmin = fmax/2**9
d = 16
cf = 19
ZsdD = 'ZsdD'
CQcc, LogP_absCQT, TimeVec, FreqVec, Ures_LogP_absCQT, Ures_FreqVec, absCQT = cqcc(x, fs, B, fmax, fmin, d, cf, ZsdD)
feats_py = [CQcc, LogP_absCQT, TimeVec, FreqVec, Ures_LogP_absCQT, absCQT]
#Matlab features
# a folder with name = filename will have txt files for each feature
matlab_feature_folder = '/Users/asimadnan/Desktop/Mres/ASVspoof-main/CQCC/' + filename.split('.')[0] + '/'
features = ['CQcc_test.txt',
'LogP_absCQT_test.txt',
'TimeVec_test.txt',
'FreqVec_test.txt',
'Ures_LogP_absCQT_test.txt',
'absCQT_test.txt']
feats_mat = []
for feature in features:
if(feature == 'LogP_absCQT_test.txt' or feature == 'CQcc_test.txt'):
feats_mat.append( extract_from_matlab( matlab_feature_folder + feature).T)
else:
feats_mat.append( extract_from_matlab( matlab_feature_folder + feature))
print('MATLAB PYTHON Match')
for idx,feature in enumerate(feats_mat):
match = (feature.shape == feats_py[idx].shape)
print('{mat:>12} {pyt:>12} {match}'.format(mat=str(feature.shape), pyt=str(feats_py[idx].shape), match=match))
for idx,feature in enumerate(feats_mat):
plt.figure(figsize=(20, 20))
plt.subplot(6, 2, idx + (idx +1))
ax = sns.heatmap(feature)
plt.title('MATLAB - HeatMap of ' + features[idx])
#plt.figure(figsize=(12, 5))
plt.subplot(6, 2, idx + (idx +2))
ax = sns.heatmap(feats_py[idx])
plt.title('PYTHON - HeatMap of ' + features[idx])
train_path = '/Users/asimadnan/Desktop/Mres/ASVSPOOF_DATA/LA/ASVspoof2019_LA_train/flac/'
all_training_files = os.listdir(train_path)
random.choices(all_training_files,k=5)
['LA_T_1203196.flac', 'LA_T_8682959.flac', 'LA_T_3316254.flac', 'LA_T_1846330.flac', 'LA_T_4000361.flac']
# saving radnom files names
# randomom_files = ['LA_T_1203196.flac',
# 'LA_T_8682959.flac',
# 'LA_T_3316254.flac',
# 'LA_T_1846330.flac',
# 'LA_T_4000361.flac']
compare_features(randomom_files[0],train_path)
MATLAB PYTHON Match
(389, 60) (389, 60) True
(389, 863) (389, 863) True
(1, 389) (1, 389) True
(1, 863) (1, 863) True
(8059, 389) (8059, 389) True
(863, 389) (863, 389) True
compare_features(randomom_files[1],train_path)
MATLAB PYTHON Match
(329, 60) (329, 60) True
(329, 863) (329, 863) True
(1, 329) (1, 329) True
(1, 863) (1, 863) True
(8059, 329) (8059, 329) True
(863, 329) (863, 329) True
compare_features(randomom_files[2],train_path)
MATLAB PYTHON Match
(225, 60) (225, 60) True
(225, 863) (225, 863) True
(1, 225) (1, 225) True
(1, 863) (1, 863) True
(8059, 225) (8059, 225) True
(863, 225) (863, 225) True
compare_features(randomom_files[3],train_path)
MATLAB PYTHON Match
(414, 60) (414, 60) True
(414, 863) (414, 863) True
(1, 414) (1, 414) True
(1, 863) (1, 863) True
(8059, 414) (8059, 414) True
(863, 414) (863, 414) True
compare_features(randomom_files[4],train_path)
MATLAB PYTHON Match
(214, 60) (214, 60) True
(214, 863) (214, 863) True
(1, 214) (1, 214) True
(1, 863) (1, 863) True
(8059, 214) (8059, 214) True
(863, 214) (863, 214) True
for idx, feat in enumerate(features):
plt_heatmap((feats_mat[idx] - feats_py[idx]),feat)
i.e if we extract feature from same file multiple times, do they change or are they always same.
def extract_twice_python(filepath):
#Python feature extraction
#audio_file_location = './'
x,fs = librosa.load(filepath, sr = 16000);
x = x.reshape(x.shape[0], 1)
B = 96
fmax = fs/2
fmin = fmax/2**9
d = 16
cf = 19
ZsdD = 'ZsdD'
CQcc, LogP_absCQT, TimeVec, FreqVec, Ures_LogP_absCQT, Ures_FreqVec, absCQT = cqcc(x, fs, B, fmax, fmin, d, cf, ZsdD)
feats_1_py = [CQcc, LogP_absCQT, TimeVec, FreqVec, Ures_LogP_absCQT, absCQT]
x,fs = librosa.load(filepath, sr = 16000);
x = x.reshape(x.shape[0], 1)
B = 96
fmax = fs/2
fmin = fmax/2**9
d = 16
cf = 19
ZsdD = 'ZsdD'
CQcc, LogP_absCQT, TimeVec, FreqVec, Ures_LogP_absCQT, Ures_FreqVec, absCQT = cqcc(x, fs, B, fmax, fmin, d, cf, ZsdD)
feats_2_py = [CQcc, LogP_absCQT, TimeVec, FreqVec, Ures_LogP_absCQT, absCQT]
return feats_1_py,feats_2_py
py_1, py_2 = extract_twice_python(train_path + randomom_files[0])
for idx, feat in enumerate(features):
plt_heatmap((py_1[idx] - py_2[idx]),feat)
for idx, feat in enumerate(features):
print( np.count_nonzero(py_1[idx] - py_2[idx]) )
0 0 0 0 0 0
def extract_twice_matlab(filename):
matlab_feature_folder = '/Users/asimadnan/Desktop/Mres/ASVspoof-main/CQCC/' + filename.split('.')[0] + '/'
features = ['CQcc_test.txt',
'LogP_absCQT_test.txt',
'TimeVec_test.txt',
'FreqVec_test.txt',
'Ures_LogP_absCQT_test.txt',
'absCQT_test.txt']
feats_mat_1 = []
for feature in features:
if(feature == 'LogP_absCQT_test.txt' or feature == 'CQcc_test.txt'):
feats_mat_1.append( extract_from_matlab( matlab_feature_folder + feature).T)
else:
feats_mat_1.append( extract_from_matlab( matlab_feature_folder + feature))
matlab_feature_folder = '/Users/asimadnan/Desktop/Mres/ASVspoof-main/CQCC/' + filename.split('.')[0] + '_dup/'
features = ['CQcc_test.txt',
'LogP_absCQT_test.txt',
'TimeVec_test.txt',
'FreqVec_test.txt',
'Ures_LogP_absCQT_test.txt',
'absCQT_test.txt']
feats_mat_2 = []
for feature in features:
if(feature == 'LogP_absCQT_test.txt' or feature == 'CQcc_test.txt'):
feats_mat_2.append( extract_from_matlab( matlab_feature_folder + feature).T)
else:
feats_mat_2.append( extract_from_matlab( matlab_feature_folder + feature))
return feats_mat_1,feats_mat_2
filename = 'LA_T_1846330.flac'
mat_1, mat_2 = extract_twice_matlab(filename)
for idx, feat in enumerate(features):
plt_heatmap((mat_1[idx] - mat_2[idx]),feat)
for idx, feat in enumerate(features):
print( np.count_nonzero(py_1[idx] - py_2[idx]) )
0 0 0 0 0 0